{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "spark连接yarn测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Setting default log level to \"WARN\".\n",
      "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
      "24/05/19 16:09:22 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n",
      "24/05/19 16:09:24 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME.\n",
      "                                                                                \r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Pi is roughly 3.144040\n"
     ]
    }
   ],
   "source": [
    "from random import random\n",
    "from operator import add\n",
    "from pyspark.sql import SparkSession\n",
    "\n",
    "\n",
    "spark = SparkSession.builder.appName(\"pyspark-yarn\").master(\"yarn\").config(\"spark.submit.deployMode\", \"client\").getOrCreate()\n",
    "\n",
    "n = 100000 * 2\n",
    "\n",
    "def f(_):\n",
    "    x = random() * 2 - 1\n",
    "    y = random() * 2 - 1\n",
    "    return 1 if x ** 2 + y ** 2 <= 1 else 0\n",
    "\n",
    "count = spark.sparkContext.parallelize(range(1, n + 1), 2).map(f).reduce(add)\n",
    "print(\"Pi is roughly %f\" % (4.0 * count / n))\n",
    "\n",
    "spark.stop()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "spark连接yarn 读取hive测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "24/05/19 16:10:51 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME.\n",
      "[Stage 1:>                                                          (0 + 1) / 1]\r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---+---------+\n",
      "| id|     name|\n",
      "+---+---------+\n",
      "|  1|hamawhite|\n",
      "|  2|  song.bs|\n",
      "+---+---------+\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                \r"
     ]
    }
   ],
   "source": [
    "import os\n",
    "from pyspark import SparkContext, SparkConf\n",
    "from pyspark.conf import SparkConf\n",
    "from pyspark.sql import SparkSession, HiveContext\n",
    "\n",
    "\n",
    "spark = SparkSession.builder.master(\"yarn\").appName('pyspark-yarn-hive').config(\"hive.metastore.uris\", \"thrift://hive-service.default:9083\", conf=SparkConf()).enableHiveSupport().getOrCreate()\n",
    "        \n",
    "\n",
    "df_load = spark.sql('select * from demo')\n",
    "df_load.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
